###########################################################
#### Replication Script for Main Results
#### Uses: MainS3.csv (Baseline) and MainS3.1.csv (Citation Level)
####
#### Disclosure Classification:
####   Q0        = No Disclosure (no code found)
####   Q1 + Q2   = Moderate Disclosure  
####   Q3 + Q4   = High Disclosure
###########################################################

library(data.table)
library(dplyr)
library(readr)
library(lfe)

# =============================================================================
# LOAD REPOSITORY DATA
# =============================================================================
repo_dir <- "YourWorkingDirectory"

# Baseline data (Section 3)
BaselineData <- read_csv(file.path(repo_dir, "MainS3.csv"))

# Citation-level data (Section 3.1)
CitationData <- read_csv(file.path(repo_dir, "MainS3.1.csv"))

cat("\n===========================================\n")
cat("SECTION 3: BASELINE REGRESSIONS\n")
cat("===========================================\n")

# =============================================================================
# TABLE: Main Effect of Digital Disclosure on Geographic Diffusion
# Dependent Variable: Log(Mean Distance + 1)
# =============================================================================

cat("\n--- Table : Mean Distance Regressions ---\n")

# Specification 1: All patents
m1 <- felm(log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) | MatchID | 0 | MatchID, data = BaselineData)

# Specification 2: No Digital Disclosure (Q0 only)
m2 <- felm(log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) | MatchID | 0 | MatchID, data = subset(BaselineData, baseline_quartile == "Q0"))

# Specification 3: Moderate Digital Disclosure (Q1 or Q2)
m3 <- felm(log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) | MatchID | 0 | MatchID, data = subset(BaselineData, baseline_quartile == "Q1" | baseline_quartile == "Q2"))

# Specification 4: High Digital Disclosure (Q3 or Q4)
m4 <- felm(log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) | MatchID | 0 | MatchID, data = subset(BaselineData, baseline_quartile == "Q3" | baseline_quartile == "Q4"))

cat("\n=== Specification 1: All Patents ===\n")
print(summary(m1))

cat("\n=== Specification 2: No Disclosure (Q0) ===\n")
print(summary(m2))

cat("\n=== Specification 3: Moderate Disclosure (Q1+Q2) ===\n")
print(summary(m3))

cat("\n=== Specification 4: High Disclosure (Q3+Q4) ===\n")
print(summary(m4))


# =============================================================================
# TABLE: Permissionless Channel Test (License Effect) - Section 3.1
# Tests whether permissive licenses amplify the diffusion effect
# IsPermissive variable is already in BaselineData (MainS3.csv)
# =============================================================================

cat("\n--- Table: Permissionless Channel Test (License Effect) ---\n")

# License effect: High Disclosure sample only
# Tests interaction between Focal and Permissive license
license_test <- felm(
    log(MeanDistance + 1) ~ Focal + Focal:IsPermissive + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q3" | baseline_quartile == "Q4")
)

cat("\n=== License Effect: High Disclosure (Q3+Q4) ===\n")
cat("Tests whether permissive licenses amplify geographic diffusion\n")
print(summary(license_test))


# =============================================================================
# TABLE: Median Distance (Appendix A5)
# =============================================================================

cat("\n--- Table: Median Distance Regressions (A5) ---\n")

# Specification 1: All patents
m1_med <- felm(log(MedianDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
    MatchID | 0 | MatchID, data = BaselineData)

# Specification 2: No Digital Disclosure (Q0 only)
m2_med <- felm(
    log(MedianDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q0")
)

# Specification 3: Moderate Digital Disclosure (Q1 or Q2)
m3_med <- felm(
    log(MedianDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q1" | baseline_quartile == "Q2")
)

# Specification 4: High Digital Disclosure (Q3 or Q4)
m4_med <- felm(
    log(MedianDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q3" | baseline_quartile == "Q4")
)

cat("\n=== Specification 1: All Patents (Median) ===\n")
print(summary(m1_med))

cat("\n=== Specification 2: No Disclosure (Median) ===\n")
print(summary(m2_med))

cat("\n=== Specification 3: Moderate Disclosure (Median) ===\n")
print(summary(m3_med))

cat("\n=== Specification 4: High Disclosure (Median) ===\n")
print(summary(m4_med))


# =============================================================================
# TABLE: Quartile-Specific Regression (Appendix A5)
# =============================================================================

cat("\n--- Table: Quartile-Specific Regression (A5) ---\n")

m_q0 <- felm(
    log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q0")
)

m_q1 <- felm(
    log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q1")
)

m_q2 <- felm(
    log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q2")
)

m_q3 <- felm(
    log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q3")
)

m_q4 <- felm(
    log(MeanDistance + 1) ~ Focal + log(Citations) + log(NClaims) |
        MatchID | 0 | MatchID,
    data = subset(BaselineData, baseline_quartile == "Q4")
)

cat("\n=== Q0: No Code Found (No Disclosure) ===\n")
print(summary(m_q0))
cat("\n=== Q1 (Moderate Disclosure) ===\n")
print(summary(m_q1))
cat("\n=== Q2 (Moderate Disclosure) ===\n")
print(summary(m_q2))
cat("\n=== Q3 (High Disclosure) ===\n")
print(summary(m_q3))
cat("\n=== Q4 (High Disclosure) ===\n")
print(summary(m_q4))


cat("\n===========================================\n")
cat("SECTION 3.1: CITATION-LEVEL REGRESSIONS\n")
cat("===========================================\n")

# =============================================================================
# TABLE: Citation-Level Analysis with Exposure 
# Uses exposure_sim for interaction  in No Disclosure sample
# =============================================================================

cat("\n--- Table: Citation-Level Regressions ---\n")

# Create indicator variables for exposure
CitationData$Focal <- as.numeric(CitationData$Focal)

# Specification 1: All patents (No Disclosure & High Disclosure combined)
c1 <- felm(log(MeanDistance + 1) ~ Focal | UniqueCitation + FLag | 0 | UniqueCitation,
    data = subset(CitationData, NoDisclosure | HighDisclosure)
)

# Specification 2: High Disclosure only
c2 <- felm(log(MeanDistance + 1) ~ Focal | UniqueCitation + FLag | 0 | UniqueCitation,
    data = subset(CitationData, HighDisclosure)
)

# Specification 3: No Disclosure with Exposure Similarity interaction
c3 <- felm(
    log(MeanDistance + 1) ~ Focal + Focal:Exposure |
        UniqueCitation + FLag | 0 | UniqueCitation,
    data = subset(CitationData, NoDisclosure)
)

# Specification 4: No Disclosure with Exposure Dummy interaction
c4 <- felm(
    log(MeanDistance + 1) ~ Focal + Focal:I(Exposure > 0) |
        UniqueCitation + FLag | 0 | UniqueCitation,
    data = subset(CitationData, NoDisclosure)
)

cat("\n=== Specification 1: All (No/High Disclosure) ===\n")
print(summary(c1))

cat("\n=== Specification 2: High Disclosure Only ===\n")
print(summary(c2))

cat("\n=== Specification 3: No Disclosure with Exposure (Similarity) ===\n")
print(summary(c3))

cat("\n=== Specification 4: No Disclosure with Exposure (Dummy) ===\n")
print(summary(c4))


